####Input for Seurat is FPKM table generated using Cuffnorm. For Combined Signals, FPKM values of significant NMF genes and Raw depth of coverage of each cell over the significant peaks as determined by HAR. 

library(Seurat) 
nbt.data=read.table("./Input.txt",sep="\t",header=TRUE,row.names=1)
nbt.data=log(nbt.data+1)
corner(nbt.data)
dim(nbt.data)
nbt=new("seurat",raw.data=nbt.data)
nbt=setup(nbt,project="Combined",min.cells = 3,names.field = 1,names.delim = "_",min.genes = 1000,is.expr=0.5,)
nbt
nbt=mean.var.plot(nbt,y.cutoff = 1.5,x.low.cutoff = 1.5,fxn.x = expMean,fxn.y = logVarDivMean)
length(nbt@var.genes)
nbt=pca(nbt,do.print=FALSE)
pca.plot(nbt,1,2,pt.size = 2)
nbt=jackStraw(nbt,num.replicate = 200,do.print = FALSE)
jackStrawPlot(nbt,PCs = 1:15)
nbt=run_tsne(nbt,dims.use = 1:3,max_iter=2000)
tsne.plot(nbt,pt.size = 1)
nbt=pca(nbt,do.print=FALSE)

###Sometimes projecting PCA might be required if the number of variable genes is low and hence the following will need to be done:
nbt=project.pca(nbt,do.print=FALSE)
nbt=jackStraw(nbt,num.replicate = 200,do.print = FALSE)
nbt.sig.genes=pca.sig.genes(nbt,1:2,pval.cut = 1e-5,max.per.pc = 200)
length(nbt.sig.genes)
nbt=pca(nbt,pc.genes=nbt.sig.genes,do.print = FALSE)
nbt=jackStraw(nbt,num.replicate = 200,do.print = FALSE)
jackStrawPlot(nbt,PCs = 1:15)


########For Plots with UMAP newer version of Seurat was used:
########Input for this analysis was raw counts generated using featureCounts

library(dplyr)
library(Seurat)


Batch1.data <- read.table("Batch1_Counts.txt", sep="\t", head=TRUE, row.names=1)
Batch2.data <- read.table("Batch2_Counts.txt", sep="\t", head=TRUE, row.names=1)


Batch1 <- CreateSeuratObject(counts = Batch1.data, project = "BATCH1", min.cells = 3, min.features = 200)

Batch2 <- CreateSeuratObject(counts = Batch2.data, project = "BATCH2", min.cells = 3, min.features = 200)

Dux <- merge(Batch1, y = Batch2, add.cell.ids = c("Batch1", "Batch2"), project = "Dux")

Batch <- scan("Batch.txt", what="")

Dux@meta.data$Batch <- Batch 

pdf("Dux_QC_BF.pdf", height = 6, width = 9)
VlnPlot(Dux, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2, group.by="Batch")
dev.off()

Dux <- subset(Dux, subset = nFeature_RNA > 1500 & nFeature_RNA < 3000)
pdf("Dux_QC_AF.pdf", height = 6, width = 9)
VlnPlot(Dux, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2, group.by="Batch")
dev.off()
library(sctransform)
Dux <- SCTransform(Dux, vars.to.regress="Batch", verbose = TRUE)
Dux <- RunPCA(Dux)
pdf("Dux_ElbowPlot.pdf", height = 6, width = 6)
ElbowPlot(Dux)
dev.off()


Dux <- RunUMAP(Dux, dims = 1:3)
Dux <- FindNeighbors(Dux, dims = 1:3)
Dux <- FindClusters(Dux, resolution=0.3)

pdf("Dux_UMAP_Clusters.pdf", height = 6, width = 7)
DimPlot(Dux, reduction="umap")
dev.off()

pdf("Dux_UMAP_Clusters_with_labels.pdf", height = 6, width = 7)
DimPlot(Dux, reduction="umap", labels=TRUE)
dev.off()

pdf("Dux_QC_After_Clustering.pdf", height = 6, width = 9)
VlnPlot(Dux, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
dev.off()


DefaultAssay(Dux) <- "RNA"
Dux <- NormalizeData(Dux)
all.genes <- rownames(Dux)
Dux <- ScaleData(Dux, features = all.genes, vars.to.regress="Batch")
Dux.markers <- FindAllMarkers(Dux, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = "roc")
write.table(Dux.markers, file="Dux_Markers_AUC.txt", sep="\t", quote=FALSE, col.names=NA)

Dux.markers.DGE <- FindAllMarkers(Dux, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.table(Dux.markers.DGE, file="Dux_Markers_DGE.txt", sep="\t", quote=FALSE, col.names=NA)

write.table(Dux@meta.data, file="Dux_MetaData.txt", sep="\t", quote=FALSE, col.names=NA)
